pefe-ief Visualization¶

0. Configuration¶

You must guarantee that the values are correct in order to achieve desired results.

In [1]:
# The results directory as produced by pefe-ief library
RESULTS_DIR = "../../my_scripts/RESULTS"

1. Utilities¶

Most of the time, you don't need to tinker with this; just proceed to the next section.

a) Loading results¶

In [2]:
from pathlib import Path
import msgpack
import msgpack_numpy
from pprint import pprint
msgpack_numpy.patch()

import numpy as np
import matplotlib.pyplot as plt

def load_results(results_dir):
    # type: (str) -> None
    INDEX_FILE_PATH = Path(results_dir) / "index.msgpack"
    with open(INDEX_FILE_PATH, 'rb') as index_file:
        results = msgpack.unpack(index_file, raw=False)
    return results

b) Plotting¶

In [3]:
import matplotlib.pyplot as plt

class PLOT_METRICS:
    ROC_AUC = 1
    ACCURACY = 2
    F1 = 4
    PRECISION = 8
    RECALL = 16
    
_PLOT_METRICS_ALL = 0
for attr, value in PLOT_METRICS.__dict__.items():
    if isinstance(value, int):
        _PLOT_METRICS_ALL |= value
PLOT_METRICS.ALL = _PLOT_METRICS_ALL

def plot_metrics_one_threshold(results, metrics, i_threshold):
    # type: (list[dict[str, dict[str, int|float|str]]], int, int) -> None
    
    model_names = [
        r['MODEL']['name']
        for r in results
    ]
    
    plt.figure(figsize=(12,6))
    if metrics & PLOT_METRICS.ROC_AUC:
        roc_auc_scores = [r['common_stats']['roc_auc'] for r in results]
        plt.plot(model_names, roc_auc_scores, marker='o', label="ROC-AUC")
    if metrics & PLOT_METRICS.ACCURACY:
        accuracy = [r['stats_per_thresholds'][i_threshold]['accuracy'] for r in results]
        plt.plot(model_names, accuracy, marker='o', label="Accuracy")
    if metrics & PLOT_METRICS.F1:
        f1_scores = [r['stats_per_thresholds'][i_threshold]['f1'] for r in results]
        plt.plot(model_names, f1_scores, marker='o', label="F1")
    if metrics & PLOT_METRICS.PRECISION:
        precision = [r['stats_per_thresholds'][i_threshold]['precision'] for r in results]
        plt.plot(model_names, precision, marker='o', label="Precision")
    if metrics & PLOT_METRICS.RECALL:
        recall = [r['stats_per_thresholds'][i_threshold]['recall'] for r in results]
        plt.plot(model_names, recall, marker='o', label="Recall")

    plt.xticks(rotation=45, ha='right')
    plt.ylabel("Score")
    plt.title("PE Malware Detection Models' Performance")
    plt.legend(loc='lower right')
    plt.tight_layout()
    plt.show()

def plot_metrics(results, metrics):
    # type: (list[dict[str, dict[str, int|float|str]]], int) -> None
    """
    Usage:

    plot_metrics(
        PLOT_METRICS.AUC
        | PLOT_METRICS.ACCURACY
        | PLOT_METRICS.F1
        ... # more if needed
    )
    """
    
    import ipywidgets as widgets
    from IPython.display import display

    
    for i_threshold in range(len(results[0]['stats_per_thresholds'])):
        threshold = results[0]['stats_per_thresholds'][i_threshold]['threshold']
        html_label = widgets.HTML(value='<p style="font-size:24px;">Threshold = ' + str(threshold) + '</p>')
        display(html_label)
        plot_metrics_one_threshold(results, metrics, i_threshold)

c) Displaying pre-rendered curves¶

In [4]:
import base64
from tqdm import tqdm

def to_base64_uri(path):
    with open(path, "rb") as f:
        data = f.read()
    return "data:image/png;base64," + base64.b64encode(data).decode("utf-8")

def display_images_in_groups(image_groups):
    import ipywidgets as widgets
    from IPython.display import display, HTML

    group_outputs = []
    group_titles = []

    for image_group in tqdm(image_groups):
        out = widgets.Output()
        with out:
            display(HTML('''
            <div style="display: flex; flex-direction: column; gap: 5px; justify-content: center; align-items: center">
                <div style="display: grid; grid-template-columns: auto auto;">'''
                + "".join('''
                    <div style="display: flex; flex-direction: column; gap: 5px; justify-content: center; align-items: center">
                        <img src="{}" />
                        <p>{}</p>
                    </div>'''.format(img['url'], img['caption'])
                     for img in image_group['images']
                )
                + '''
                </div>
            </div>
            '''))
        group_outputs.append(out)
        group_titles.append(image_group['caption'])
    
    accordion = widgets.Accordion(
        children=group_outputs,
        titles=group_titles,
    )
    display(accordion)

def display_prerendered_curves(results):
    # type: (list[dict[str, dict[str, int|float|str]]]) -> None
    import ipyplot
    import ipywidgets as widgets
    from IPython.display import display

    display_images_in_groups([
        {
            "caption": r["MODEL"]["name"],
            
            "images": [
                {
                    "caption": c["type"],
                    "url": to_base64_uri(c["plot_path"])
                }
                for c in r["curves"]
            ],
        } for r in results
    ])

    

2. Your Playground¶

a) Loading pre-rendered results¶

Just run this once.

In [5]:
RESULTS = load_results(RESULTS_DIR)
RESULTS
Out[5]:
[{'dataset': {'total_count': 403032,
   'malware_count': 215930,
   'benign_count': 187102},
  'stats_per_thresholds': [{'threshold': 0.5,
    'total_hits': 240851,
    'total_misses': 162181,
    'accuracy': 0.5975977093630283,
    'TP': 55451,
    'TN': 185400,
    'FP': 1702,
    'FN': 160479,
    'precision': 0.2568008150789608,
    'recall': 0.2568008150789608,
    'f1': 0.2568008150789608},
   {'threshold': 0.6,
    'total_hits': 227248,
    'total_misses': 175784,
    'accuracy': 0.5638460469640127,
    'TP': 41199,
    'TN': 186049,
    'FP': 1053,
    'FN': 174731,
    'precision': 0.19079794377807624,
    'recall': 0.19079794377807624,
    'f1': 0.19079794377807624},
   {'threshold': 0.7,
    'total_hits': 214647,
    'total_misses': 188385,
    'accuracy': 0.5325805395105103,
    'TP': 28164,
    'TN': 186483,
    'FP': 619,
    'FN': 187766,
    'precision': 0.1304311582457278,
    'recall': 0.1304311582457278,
    'f1': 0.1304311582457278},
   {'threshold': 0.8,
    'total_hits': 203959,
    'total_misses': 199073,
    'accuracy': 0.5060615534250382,
    'TP': 17130,
    'TN': 186829,
    'FP': 273,
    'FN': 198800,
    'precision': 0.07933126476172833,
    'recall': 0.07933126476172833,
    'f1': 0.07933126476172833},
   {'threshold': 0.85,
    'total_hits': 198664,
    'total_misses': 204368,
    'accuracy': 0.49292363881776136,
    'TP': 11685,
    'TN': 186979,
    'FP': 123,
    'FN': 204245,
    'precision': 0.054114759412772655,
    'recall': 0.054114759412772655,
    'f1': 0.054114759412772655},
   {'threshold': 0.9,
    'total_hits': 192527,
    'total_misses': 210505,
    'accuracy': 0.4776965600746343,
    'TP': 5479,
    'TN': 187048,
    'FP': 54,
    'FN': 210451,
    'precision': 0.025373963784559812,
    'recall': 0.025373963784559812,
    'f1': 0.025373963784559812}],
  'curves': [{'type': 'ROC',
    'plot_path': '/home/lam/Desktop/Viettel/EMBER2024/my_scripts/RESULTS/images/EMBER2024_APK_IEF_ROC.png'},
   {'type': 'DET',
    'plot_path': '/home/lam/Desktop/Viettel/EMBER2024/my_scripts/RESULTS/images/EMBER2024_APK_IEF_DET.png'},
   {'type': 'Actual Positives',
    'plot_path': '/home/lam/Desktop/Viettel/EMBER2024/my_scripts/RESULTS/images/EMBER2024_APK_IEF_TPR_FNR_per_threshold_aka_Actual_Positives.png'},
   {'type': 'Actual Negatives',
    'plot_path': '/home/lam/Desktop/Viettel/EMBER2024/my_scripts/RESULTS/images/EMBER2024_APK_IEF_TNR_FFR_per_threshold_aka_Actual_Negatives.png'}],
  'common_stats': {'roc_auc': 0.8849607108571744},
  'MODEL': {'type': 'EMBER2024_LGBM',
   'path': '/home/lam/Desktop/Viettel/EMBER2024/models/EMBER2024_APK.model',
   'name': 'EMBER2024_APK'}},
 {'dataset': {'total_count': 403032,
   'malware_count': 215930,
   'benign_count': 187102},
  'stats_per_thresholds': [{'threshold': 0.5,
    'total_hits': 354136,
    'total_misses': 48896,
    'accuracy': 0.8786796085670616,
    'TP': 214895,
    'TN': 139241,
    'FP': 47861,
    'FN': 1035,
    'precision': 0.9952067799749919,
    'recall': 0.9952067799749919,
    'f1': 0.9952067799749919},
   {'threshold': 0.6,
    'total_hits': 358616,
    'total_misses': 44416,
    'accuracy': 0.8897953512376189,
    'TP': 214549,
    'TN': 144067,
    'FP': 43035,
    'FN': 1381,
    'precision': 0.9936044088361969,
    'recall': 0.9936044088361969,
    'f1': 0.9936044088361969},
   {'threshold': 0.7,
    'total_hits': 362956,
    'total_misses': 40076,
    'accuracy': 0.9005637269497211,
    'TP': 213981,
    'TN': 148975,
    'FP': 38127,
    'FN': 1949,
    'precision': 0.9909739267355161,
    'recall': 0.9909739267355161,
    'f1': 0.9909739267355161},
   {'threshold': 0.8,
    'total_hits': 367548,
    'total_misses': 35484,
    'accuracy': 0.9119573631870422,
    'TP': 213188,
    'TN': 154360,
    'FP': 32742,
    'FN': 2742,
    'precision': 0.9873014402815727,
    'recall': 0.9873014402815727,
    'f1': 0.9873014402815727},
   {'threshold': 0.85,
    'total_hits': 370324,
    'total_misses': 32708,
    'accuracy': 0.918845153734691,
    'TP': 212543,
    'TN': 157781,
    'FP': 29321,
    'FN': 3387,
    'precision': 0.9843143611355532,
    'recall': 0.9843143611355532,
    'f1': 0.9843143611355532},
   {'threshold': 0.9,
    'total_hits': 373079,
    'total_misses': 29953,
    'accuracy': 0.9256808392385716,
    'TP': 211123,
    'TN': 161956,
    'FP': 25146,
    'FN': 4807,
    'precision': 0.9777381558838513,
    'recall': 0.9777381558838513,
    'f1': 0.9777381558838513}],
  'curves': [{'type': 'ROC',
    'plot_path': '/home/lam/Desktop/Viettel/EMBER2024/my_scripts/RESULTS/images/EMBER2024_Dot_Net_IEF_ROC.png'},
   {'type': 'DET',
    'plot_path': '/home/lam/Desktop/Viettel/EMBER2024/my_scripts/RESULTS/images/EMBER2024_Dot_Net_IEF_DET.png'},
   {'type': 'Actual Positives',
    'plot_path': '/home/lam/Desktop/Viettel/EMBER2024/my_scripts/RESULTS/images/EMBER2024_Dot_Net_IEF_TPR_FNR_per_threshold_aka_Actual_Positives.png'},
   {'type': 'Actual Negatives',
    'plot_path': '/home/lam/Desktop/Viettel/EMBER2024/my_scripts/RESULTS/images/EMBER2024_Dot_Net_IEF_TNR_FFR_per_threshold_aka_Actual_Negatives.png'}],
  'common_stats': {'roc_auc': 0.9805540385458298},
  'MODEL': {'type': 'EMBER2024_LGBM',
   'path': '/home/lam/Desktop/Viettel/EMBER2024/models/EMBER2024_Dot_Net.model',
   'name': 'EMBER2024_Dot_Net'}},
 {'dataset': {'total_count': 403032,
   'malware_count': 215930,
   'benign_count': 187102},
  'stats_per_thresholds': [{'threshold': 0.5,
    'total_hits': 240296,
    'total_misses': 162736,
    'accuracy': 0.5962206474920105,
    'TP': 54514,
    'TN': 185782,
    'FP': 1320,
    'FN': 161416,
    'precision': 0.25246144583892927,
    'recall': 0.25246144583892927,
    'f1': 0.25246144583892927},
   {'threshold': 0.6,
    'total_hits': 236654,
    'total_misses': 166378,
    'accuracy': 0.5871841441870621,
    'TP': 50412,
    'TN': 186242,
    'FP': 860,
    'FN': 165518,
    'precision': 0.2334645486963368,
    'recall': 0.2334645486963368,
    'f1': 0.2334645486963368},
   {'threshold': 0.7,
    'total_hits': 230413,
    'total_misses': 172619,
    'accuracy': 0.5716990214176542,
    'TP': 43830,
    'TN': 186583,
    'FP': 519,
    'FN': 172100,
    'precision': 0.2029824480155606,
    'recall': 0.2029824480155606,
    'f1': 0.2029824480155606},
   {'threshold': 0.8,
    'total_hits': 218136,
    'total_misses': 184896,
    'accuracy': 0.5412374203537188,
    'TP': 31332,
    'TN': 186804,
    'FP': 298,
    'FN': 184598,
    'precision': 0.14510257953966563,
    'recall': 0.14510257953966563,
    'f1': 0.14510257953966563},
   {'threshold': 0.85,
    'total_hits': 214488,
    'total_misses': 188544,
    'accuracy': 0.532186029893408,
    'TP': 27603,
    'TN': 186885,
    'FP': 217,
    'FN': 188327,
    'precision': 0.12783309405825963,
    'recall': 0.12783309405825963,
    'f1': 0.12783309405825963},
   {'threshold': 0.9,
    'total_hits': 210191,
    'total_misses': 192841,
    'accuracy': 0.5215243454614026,
    'TP': 23265,
    'TN': 186926,
    'FP': 176,
    'FN': 192665,
    'precision': 0.10774325012735608,
    'recall': 0.10774325012735608,
    'f1': 0.1077432501273561}],
  'curves': [{'type': 'ROC',
    'plot_path': '/home/lam/Desktop/Viettel/EMBER2024/my_scripts/RESULTS/images/EMBER2024_ELF_IEF_ROC.png'},
   {'type': 'DET',
    'plot_path': '/home/lam/Desktop/Viettel/EMBER2024/my_scripts/RESULTS/images/EMBER2024_ELF_IEF_DET.png'},
   {'type': 'Actual Positives',
    'plot_path': '/home/lam/Desktop/Viettel/EMBER2024/my_scripts/RESULTS/images/EMBER2024_ELF_IEF_TPR_FNR_per_threshold_aka_Actual_Positives.png'},
   {'type': 'Actual Negatives',
    'plot_path': '/home/lam/Desktop/Viettel/EMBER2024/my_scripts/RESULTS/images/EMBER2024_ELF_IEF_TNR_FFR_per_threshold_aka_Actual_Negatives.png'}],
  'common_stats': {'roc_auc': 0.757958249459181},
  'MODEL': {'type': 'EMBER2024_LGBM',
   'path': '/home/lam/Desktop/Viettel/EMBER2024/models/EMBER2024_ELF.model',
   'name': 'EMBER2024_ELF'}},
 {'dataset': {'total_count': 403032,
   'malware_count': 215930,
   'benign_count': 187102},
  'stats_per_thresholds': [{'threshold': 0.5,
    'total_hits': 213702,
    'total_misses': 189330,
    'accuracy': 0.5302358125409397,
    'TP': 150702,
    'TN': 63000,
    'FP': 124102,
    'FN': 65228,
    'precision': 0.6979206224239337,
    'recall': 0.6979206224239337,
    'f1': 0.6979206224239337},
   {'threshold': 0.6,
    'total_hits': 215851,
    'total_misses': 187181,
    'accuracy': 0.5355678953532226,
    'TP': 141459,
    'TN': 74392,
    'FP': 112710,
    'FN': 74471,
    'precision': 0.6551150835919047,
    'recall': 0.6551150835919047,
    'f1': 0.6551150835919047},
   {'threshold': 0.7,
    'total_hits': 218203,
    'total_misses': 184829,
    'accuracy': 0.5414036602552651,
    'TP': 130686,
    'TN': 87517,
    'FP': 99585,
    'FN': 85244,
    'precision': 0.60522391515769,
    'recall': 0.60522391515769,
    'f1': 0.60522391515769},
   {'threshold': 0.8,
    'total_hits': 220010,
    'total_misses': 183022,
    'accuracy': 0.5458871752118939,
    'TP': 116359,
    'TN': 103651,
    'FP': 83451,
    'FN': 99571,
    'precision': 0.5388737090723845,
    'recall': 0.5388737090723845,
    'f1': 0.5388737090723845},
   {'threshold': 0.85,
    'total_hits': 219527,
    'total_misses': 183505,
    'accuracy': 0.5446887592052244,
    'TP': 105572,
    'TN': 113955,
    'FP': 73147,
    'FN': 110358,
    'precision': 0.48891770481174457,
    'recall': 0.48891770481174457,
    'f1': 0.48891770481174457},
   {'threshold': 0.9,
    'total_hits': 210233,
    'total_misses': 192799,
    'accuracy': 0.521628555548939,
    'TP': 83460,
    'TN': 126773,
    'FP': 60329,
    'FN': 132470,
    'precision': 0.38651414810355206,
    'recall': 0.38651414810355206,
    'f1': 0.38651414810355206}],
  'curves': [{'type': 'ROC',
    'plot_path': '/home/lam/Desktop/Viettel/EMBER2024/my_scripts/RESULTS/images/EMBER2024_PDF_IEF_ROC.png'},
   {'type': 'DET',
    'plot_path': '/home/lam/Desktop/Viettel/EMBER2024/my_scripts/RESULTS/images/EMBER2024_PDF_IEF_DET.png'},
   {'type': 'Actual Positives',
    'plot_path': '/home/lam/Desktop/Viettel/EMBER2024/my_scripts/RESULTS/images/EMBER2024_PDF_IEF_TPR_FNR_per_threshold_aka_Actual_Positives.png'},
   {'type': 'Actual Negatives',
    'plot_path': '/home/lam/Desktop/Viettel/EMBER2024/my_scripts/RESULTS/images/EMBER2024_PDF_IEF_TNR_FFR_per_threshold_aka_Actual_Negatives.png'}],
  'common_stats': {'roc_auc': 0.5415724612145769},
  'MODEL': {'type': 'EMBER2024_LGBM',
   'path': '/home/lam/Desktop/Viettel/EMBER2024/models/EMBER2024_PDF.model',
   'name': 'EMBER2024_PDF'}},
 {'dataset': {'total_count': 403032,
   'malware_count': 215930,
   'benign_count': 187102},
  'stats_per_thresholds': [{'threshold': 0.5,
    'total_hits': 394027,
    'total_misses': 9005,
    'accuracy': 0.977656860993668,
    'TP': 210573,
    'TN': 183454,
    'FP': 3648,
    'FN': 5357,
    'precision': 0.9751910341314315,
    'recall': 0.9751910341314315,
    'f1': 0.9751910341314315},
   {'threshold': 0.6,
    'total_hits': 393710,
    'total_misses': 9322,
    'accuracy': 0.9768703229520237,
    'TP': 209122,
    'TN': 184588,
    'FP': 2514,
    'FN': 6808,
    'precision': 0.9684712638355022,
    'recall': 0.9684712638355022,
    'f1': 0.9684712638355022},
   {'threshold': 0.7,
    'total_hits': 392597,
    'total_misses': 10435,
    'accuracy': 0.9741087556323071,
    'TP': 207214,
    'TN': 185383,
    'FP': 1719,
    'FN': 8716,
    'precision': 0.9596350669198351,
    'recall': 0.9596350669198351,
    'f1': 0.9596350669198351},
   {'threshold': 0.8,
    'total_hits': 387593,
    'total_misses': 15439,
    'accuracy': 0.9616928680601045,
    'TP': 201490,
    'TN': 186103,
    'FP': 999,
    'FN': 14440,
    'precision': 0.9331264761728338,
    'recall': 0.9331264761728338,
    'f1': 0.9331264761728338},
   {'threshold': 0.85,
    'total_hits': 382681,
    'total_misses': 20351,
    'accuracy': 0.9495052502034578,
    'TP': 196317,
    'TN': 186364,
    'FP': 738,
    'FN': 19613,
    'precision': 0.9091696383087111,
    'recall': 0.9091696383087111,
    'f1': 0.9091696383087111},
   {'threshold': 0.9,
    'total_hits': 370047,
    'total_misses': 32985,
    'accuracy': 0.9181578633954623,
    'TP': 183464,
    'TN': 186583,
    'FP': 519,
    'FN': 32466,
    'precision': 0.8496457185198907,
    'recall': 0.8496457185198907,
    'f1': 0.8496457185198908}],
  'curves': [{'type': 'ROC',
    'plot_path': '/home/lam/Desktop/Viettel/EMBER2024/my_scripts/RESULTS/images/EMBER2024_PE_IEF_ROC.png'},
   {'type': 'DET',
    'plot_path': '/home/lam/Desktop/Viettel/EMBER2024/my_scripts/RESULTS/images/EMBER2024_PE_IEF_DET.png'},
   {'type': 'Actual Positives',
    'plot_path': '/home/lam/Desktop/Viettel/EMBER2024/my_scripts/RESULTS/images/EMBER2024_PE_IEF_TPR_FNR_per_threshold_aka_Actual_Positives.png'},
   {'type': 'Actual Negatives',
    'plot_path': '/home/lam/Desktop/Viettel/EMBER2024/my_scripts/RESULTS/images/EMBER2024_PE_IEF_TNR_FFR_per_threshold_aka_Actual_Negatives.png'}],
  'common_stats': {'roc_auc': 0.9972285487356171},
  'MODEL': {'type': 'EMBER2024_LGBM',
   'path': '/home/lam/Desktop/Viettel/EMBER2024/models/EMBER2024_PE.model',
   'name': 'EMBER2024_PE'}},
 {'dataset': {'total_count': 403032,
   'malware_count': 215930,
   'benign_count': 187102},
  'stats_per_thresholds': [{'threshold': 0.5,
    'total_hits': 393286,
    'total_misses': 9746,
    'accuracy': 0.9758182973064173,
    'TP': 208294,
    'TN': 184992,
    'FP': 2110,
    'FN': 7636,
    'precision': 0.9646366878154957,
    'recall': 0.9646366878154957,
    'f1': 0.9646366878154957},
   {'threshold': 0.6,
    'total_hits': 392454,
    'total_misses': 10578,
    'accuracy': 0.973753945096171,
    'TP': 206860,
    'TN': 185594,
    'FP': 1508,
    'FN': 9070,
    'precision': 0.9579956467373686,
    'recall': 0.9579956467373686,
    'f1': 0.9579956467373686},
   {'threshold': 0.7,
    'total_hits': 390935,
    'total_misses': 12097,
    'accuracy': 0.9699850135969352,
    'TP': 204909,
    'TN': 186026,
    'FP': 1076,
    'FN': 11021,
    'precision': 0.9489603112119669,
    'recall': 0.9489603112119669,
    'f1': 0.9489603112119669},
   {'threshold': 0.8,
    'total_hits': 388048,
    'total_misses': 14984,
    'accuracy': 0.9628218106750829,
    'TP': 201664,
    'TN': 186384,
    'FP': 718,
    'FN': 14266,
    'precision': 0.9339322928726902,
    'recall': 0.9339322928726902,
    'f1': 0.9339322928726902},
   {'threshold': 0.85,
    'total_hits': 385104,
    'total_misses': 17928,
    'accuracy': 0.9555171797772881,
    'TP': 198528,
    'TN': 186576,
    'FP': 526,
    'FN': 17402,
    'precision': 0.9194090677534387,
    'recall': 0.9194090677534387,
    'f1': 0.9194090677534387},
   {'threshold': 0.9,
    'total_hits': 374195,
    'total_misses': 28837,
    'accuracy': 0.9284498501359694,
    'TP': 187446,
    'TN': 186749,
    'FP': 353,
    'FN': 28484,
    'precision': 0.8680868800074099,
    'recall': 0.8680868800074099,
    'f1': 0.8680868800074099}],
  'curves': [{'type': 'ROC',
    'plot_path': '/home/lam/Desktop/Viettel/EMBER2024/my_scripts/RESULTS/images/EMBER2024_Win32_IEF_ROC.png'},
   {'type': 'DET',
    'plot_path': '/home/lam/Desktop/Viettel/EMBER2024/my_scripts/RESULTS/images/EMBER2024_Win32_IEF_DET.png'},
   {'type': 'Actual Positives',
    'plot_path': '/home/lam/Desktop/Viettel/EMBER2024/my_scripts/RESULTS/images/EMBER2024_Win32_IEF_TPR_FNR_per_threshold_aka_Actual_Positives.png'},
   {'type': 'Actual Negatives',
    'plot_path': '/home/lam/Desktop/Viettel/EMBER2024/my_scripts/RESULTS/images/EMBER2024_Win32_IEF_TNR_FFR_per_threshold_aka_Actual_Negatives.png'}],
  'common_stats': {'roc_auc': 0.9963488271370166},
  'MODEL': {'type': 'EMBER2024_LGBM',
   'path': '/home/lam/Desktop/Viettel/EMBER2024/models/EMBER2024_Win32.model',
   'name': 'EMBER2024_Win32'}},
 {'dataset': {'total_count': 403032,
   'malware_count': 215930,
   'benign_count': 187102},
  'stats_per_thresholds': [{'threshold': 0.5,
    'total_hits': 381282,
    'total_misses': 21750,
    'accuracy': 0.9460340618114691,
    'TP': 205639,
    'TN': 175643,
    'FP': 11459,
    'FN': 10291,
    'precision': 0.9523410364469967,
    'recall': 0.9523410364469967,
    'f1': 0.9523410364469967},
   {'threshold': 0.6,
    'total_hits': 380867,
    'total_misses': 22165,
    'accuracy': 0.9450043668989063,
    'TP': 202918,
    'TN': 177949,
    'FP': 9153,
    'FN': 13012,
    'precision': 0.9397397304682072,
    'recall': 0.9397397304682072,
    'f1': 0.9397397304682072},
   {'threshold': 0.7,
    'total_hits': 377698,
    'total_misses': 25334,
    'accuracy': 0.9371414676750234,
    'TP': 197580,
    'TN': 180118,
    'FP': 6984,
    'FN': 18350,
    'precision': 0.9150187560783587,
    'recall': 0.9150187560783587,
    'f1': 0.9150187560783587},
   {'threshold': 0.8,
    'total_hits': 367033,
    'total_misses': 35999,
    'accuracy': 0.9106795490184402,
    'TP': 184787,
    'TN': 182246,
    'FP': 4856,
    'FN': 31143,
    'precision': 0.855772704117075,
    'recall': 0.855772704117075,
    'f1': 0.855772704117075},
   {'threshold': 0.85,
    'total_hits': 363299,
    'total_misses': 39733,
    'accuracy': 0.9014147759979356,
    'TP': 179876,
    'TN': 183423,
    'FP': 3679,
    'FN': 36054,
    'precision': 0.833029222433196,
    'recall': 0.833029222433196,
    'f1': 0.833029222433196},
   {'threshold': 0.9,
    'total_hits': 357364,
    'total_misses': 45668,
    'accuracy': 0.8866888981520078,
    'TP': 172740,
    'TN': 184624,
    'FP': 2478,
    'FN': 43190,
    'precision': 0.7999814754781642,
    'recall': 0.7999814754781642,
    'f1': 0.7999814754781642}],
  'curves': [{'type': 'ROC',
    'plot_path': '/home/lam/Desktop/Viettel/EMBER2024/my_scripts/RESULTS/images/EMBER2024_Win64_IEF_ROC.png'},
   {'type': 'DET',
    'plot_path': '/home/lam/Desktop/Viettel/EMBER2024/my_scripts/RESULTS/images/EMBER2024_Win64_IEF_DET.png'},
   {'type': 'Actual Positives',
    'plot_path': '/home/lam/Desktop/Viettel/EMBER2024/my_scripts/RESULTS/images/EMBER2024_Win64_IEF_TPR_FNR_per_threshold_aka_Actual_Positives.png'},
   {'type': 'Actual Negatives',
    'plot_path': '/home/lam/Desktop/Viettel/EMBER2024/my_scripts/RESULTS/images/EMBER2024_Win64_IEF_TNR_FFR_per_threshold_aka_Actual_Negatives.png'}],
  'common_stats': {'roc_auc': 0.9884709233310054},
  'MODEL': {'type': 'EMBER2024_LGBM',
   'path': '/home/lam/Desktop/Viettel/EMBER2024/models/EMBER2024_Win64.model',
   'name': 'EMBER2024_Win64'}},
 {'dataset': {'total_count': 403032,
   'malware_count': 215930,
   'benign_count': 187102},
  'stats_per_thresholds': [{'threshold': 0.5,
    'total_hits': 392344,
    'total_misses': 10688,
    'accuracy': 0.9734810139145279,
    'TP': 209208,
    'TN': 183136,
    'FP': 3966,
    'FN': 6722,
    'precision': 0.9688695410549715,
    'recall': 0.9688695410549715,
    'f1': 0.9688695410549715},
   {'threshold': 0.6,
    'total_hits': 391585,
    'total_misses': 11447,
    'accuracy': 0.9715977887611902,
    'TP': 207272,
    'TN': 184313,
    'FP': 2789,
    'FN': 8658,
    'precision': 0.9599036724864539,
    'recall': 0.9599036724864539,
    'f1': 0.9599036724864539},
   {'threshold': 0.7,
    'total_hits': 389627,
    'total_misses': 13405,
    'accuracy': 0.9667396137279421,
    'TP': 204450,
    'TN': 185177,
    'FP': 1925,
    'FN': 11480,
    'precision': 0.9468346223313111,
    'recall': 0.9468346223313111,
    'f1': 0.9468346223313111},
   {'threshold': 0.8,
    'total_hits': 385249,
    'total_misses': 17783,
    'accuracy': 0.955876952698545,
    'TP': 199289,
    'TN': 185960,
    'FP': 1142,
    'FN': 16641,
    'precision': 0.9229333580326958,
    'recall': 0.9229333580326958,
    'f1': 0.9229333580326958},
   {'threshold': 0.85,
    'total_hits': 373298,
    'total_misses': 29734,
    'accuracy': 0.9262242204092975,
    'TP': 187028,
    'TN': 186270,
    'FP': 832,
    'FN': 28902,
    'precision': 0.8661510674755708,
    'recall': 0.8661510674755708,
    'f1': 0.8661510674755708},
   {'threshold': 0.9,
    'total_hits': 362675,
    'total_misses': 40357,
    'accuracy': 0.8998665118402509,
    'TP': 176089,
    'TN': 186586,
    'FP': 516,
    'FN': 39841,
    'precision': 0.8154911313851712,
    'recall': 0.8154911313851712,
    'f1': 0.8154911313851712}],
  'curves': [{'type': 'ROC',
    'plot_path': '/home/lam/Desktop/Viettel/EMBER2024/my_scripts/RESULTS/images/EMBER2024_all_IEF_ROC.png'},
   {'type': 'DET',
    'plot_path': '/home/lam/Desktop/Viettel/EMBER2024/my_scripts/RESULTS/images/EMBER2024_all_IEF_DET.png'},
   {'type': 'Actual Positives',
    'plot_path': '/home/lam/Desktop/Viettel/EMBER2024/my_scripts/RESULTS/images/EMBER2024_all_IEF_TPR_FNR_per_threshold_aka_Actual_Positives.png'},
   {'type': 'Actual Negatives',
    'plot_path': '/home/lam/Desktop/Viettel/EMBER2024/my_scripts/RESULTS/images/EMBER2024_all_IEF_TNR_FFR_per_threshold_aka_Actual_Negatives.png'}],
  'common_stats': {'roc_auc': 0.9963450763698491},
  'MODEL': {'type': 'EMBER2024_LGBM',
   'path': '/home/lam/Desktop/Viettel/EMBER2024/models/EMBER2024_all.model',
   'name': 'EMBER2024_all'}}]

b) Plotting general metrics¶

In [6]:
plot_metrics(RESULTS, PLOT_METRICS.ALL)
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [7]:
plot_metrics(RESULTS, PLOT_METRICS.PRECISION)
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [8]:
plot_metrics(RESULTS, PLOT_METRICS.ACCURACY | PLOT_METRICS.RECALL)
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [9]:
plot_metrics(RESULTS, PLOT_METRICS.F1 | PLOT_METRICS.ROC_AUC)
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image

c) Displaying pre-rendered ROC, DET curves¶

In [10]:
display_prerendered_curves(RESULTS)
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00, 147.75it/s]